To run: Beforehand:

module load gcc

In R:

setwd("~/shared-gandalm/brain_CTP/Scripts/integration/pgs/pgs_analysis")
rmarkdown::render("pgs_analysis.Rmd", "html_document")

1 Set-up

1.1 Directories and libraries

library(data.table)
## data.table 1.14.2 using 6 threads (see ?getDTthreads).  Latest news: r-datatable.com
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between()   masks data.table::between()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::first()     masks data.table::first()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::last()      masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
library(broom)
library(ggplot2)
library(wesanderson)
library(ggrepel)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(DT)
asd_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.ASD_Grove2018_logOR.sbayesr.sscore"
azd_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.AZD_Marioni2018.sbayesr.sscore"
scz_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.SCZ_Pardinas2018.sbayesr.sscore"
#scz_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.SCZ_Pardinas2018_v2.sbayesr.sscore"
height_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.Height_Yengo2018.sbayesr.sscore"
mdd_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.MDD_Howard2019.sbayesr.sscore"
eduyears_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.EduYears_Lee2018.sbayesr.sscore"
chronotype_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.Chronotype_Jones2019.sbayesr.sscore"
iq_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.IQ_Savage2018.sbayesr.sscore"

pheno_dir <- "~/shared-gandalm/brain_CTP/Data/integration/ctp_differences/ctp_clr1e-3_aggregated_scz_asd_azd.txt"

out_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/analysis"

1.2 Read in data

asd_pgs <- read.delim(asd_pgs_dir, header = T, as.is = T)
azd_pgs <- read.delim(azd_pgs_dir, header = T, as.is = T)
scz_pgs <- read.delim(scz_pgs_dir, header = T, as.is = T)
height_pgs <- read.delim(height_pgs_dir, header = T, as.is = T)
mdd_pgs <- read.delim(mdd_pgs_dir, header = T, as.is = T)
eduyears_pgs <- read.delim(eduyears_pgs_dir, header = T, as.is = T)
#chronotype_pgs <- read.delim(chronotype_pgs_dir, header = T, as.is = T)
#iq_pgs <- read.delim(iq_pgs_dir, header = T, as.is = T)

pgs_trait_all <- TRUE

# Change depending on analysis
pgs_name <- c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "Height_PGS")
pgs_name <- c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "EduYears_PGS", "MDD_PGS", "Height_PGS")
#pgs_name <- c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "Height_PGS", "Chronotype_PGS", "IQ_PGS")

pheno <- read.delim(pheno_dir, header = T, as.is = T)
pheno$age2 <- pheno$age^2

asd_exclude <- read.delim("~/shared-gandalm/brain_CTP/Data/integration/ctp_differences/asd_exclude_studybalance.id", header = F, as.is = T)
scz_exclude <- read.delim("~/shared-gandalm/brain_CTP/Data/integration/ctp_differences/scz_exclude_studybalance.id", header = F, as.is = T)

1.3 Munge into single data frame

1.4 All PGS together

asd_pgs <- asd_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
azd_pgs <- azd_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
scz_pgs <- scz_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
height_pgs <- height_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
mdd_pgs <- mdd_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
eduyears_pgs <- eduyears_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
#chronotype_pgs <- chronotype_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
#iq_pgs <- iq_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))

colnames(asd_pgs)[which(colnames(asd_pgs) == "SCORE1_AVG")] <- "asd_pgs_raw"
colnames(azd_pgs)[which(colnames(azd_pgs) == "SCORE1_AVG")] <- "azd_pgs_raw"
colnames(scz_pgs)[which(colnames(scz_pgs) == "SCORE1_AVG")] <- "scz_pgs_raw"
colnames(height_pgs)[which(colnames(height_pgs) == "SCORE1_AVG")] <- "height_pgs_raw"
colnames(mdd_pgs)[which(colnames(mdd_pgs) == "SCORE1_AVG")] <- "mdd_pgs_raw"
colnames(eduyears_pgs)[which(colnames(eduyears_pgs) == "SCORE1_AVG")] <- "eduyears_pgs_raw"
#colnames(chronotype_pgs)[which(colnames(chronotype_pgs) == "SCORE1_AVG")] <- "chronotype_pgs_raw"
#colnames(iq_pgs)[which(colnames(iq_pgs) == "SCORE1_AVG")] <- "iq_pgs_raw"

pgs <- inner_join(asd_pgs, azd_pgs, by = "IID")
pgs <- inner_join(pgs, scz_pgs, by = "IID")
pgs <- inner_join(pgs, height_pgs, by = "IID")

if (pgs_trait_all == TRUE) {
  pgs <- inner_join(pgs, mdd_pgs, by = "IID")
  pgs <- inner_join(pgs, eduyears_pgs, by = "IID")
  #pgs <- inner_join(pgs, chronotype_pgs, by = "IID")
  #pgs <- inner_join(pgs, iq_pgs, by = "IID")
}
colnames(pgs)[which(colnames(pgs) == "IID")] <- "genoid"

# becuase of temporary duplicated merged bfile
pgs <- pgs[!duplicated(pgs),]

1.5 Make a genoid file for PGS dataframe

rosmap_id <- read.delim("~/shared-gandalm/GenomicDatasets/ROSMAP/ROSMAP_metadata/ROSMAP_wgs_id_fordatalinkage.txt", header = T, as.is = T)
rosmap_genoid <- rosmap_id[,c("individualID", "specimenID")]
colnames(rosmap_genoid) <- c("IID", "genoid")

libd_id <- read.delim("~/shared-gandalm/GenomicDatasets/LIBD_phase2/methyl/Jaffe_methylation_DLPFC/pheno/GSE74193_series_matrix_pheno_keep_df.txt", header = T, as.is = T)
libd_genoid <- libd_id[,c("ID", "brnum")]
colnames(libd_genoid) <- c("IID", "genoid")

asdbrain_id <- read.csv("~/shared-gandalm/brain_CTP/Data/pheno/ASD_methylation_brain/Wong_ASD_Brain_Methylation_SuppTable1_pheno.csv")
asdbrain_genoid <- asdbrain_id[,c("SampleName", "BrainID")]
colnames(asdbrain_genoid) <- c("IID", "genoid")

# Combine together and save
genoid <- rbind(rosmap_genoid, libd_genoid, asdbrain_genoid)
write.table(genoid, paste(out_dir, "/ROSMAP_LIBD_ASDbrain_genoid_IID.id", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")

# Write table with genoid overlapping with pheno
genoid_ctp <- genoid[which(genoid$IID %in% pheno$IID),]
write.table(genoid_ctp[,c(2,2,1)], paste("~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/ROSMAP_LIBD_ASDbrain_genoid_IID_brainCTP.id", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")

1.6 Join all files together into one (n=1056)

pgs_genoid <- inner_join(genoid, pgs, by = "genoid")
pgs_pheno <- inner_join(pheno, pgs_genoid)
## Joining, by = "IID"

1.7 Filter for EUR only

rosmap_pop <- read.delim("~/shared-gandalm/GenomicDatasets/ROSMAP/ROSMAP_WGS/qc_check/pc_and_population.txt", header = T, as.is = T)
libd_pop <- read.delim("~/shared-gandalm/brain_CTP/Data/genotyping/Jaffe2018/merge_arrays/pc_and_population_Illumina_1M_Illumina_h650.txt", header = T, as.is = T)
asdbrain_pop <- read.delim("~/shared-gandalm/GenomicDatasets/PsychENCODE/Genotypes/IndividualStudies/UCLA_ASD_synapse/grm/pc_and_population.txt", header = T, as.is = T)
asdbrain_pop$IID <- gsub("/", "_", asdbrain_pop$IID)

pop <- rbind(rosmap_pop, libd_pop, asdbrain_pop)
pop_ctp <- pop %>% filter(IID %in% pgs_pheno$genoid)
pop_eur <- pop %>% filter(population == "EUR") %>% mutate(genoid = IID)
pop_ctp_eur <- pop %>% filter(IID %in% pop_ctp$IID) %>% filter(IID %in% pop_eur$IID)

write.table(pop, "~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/pc_and_population_ROSMAP_LIBD_ASDbrain.txt", col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pop_eur, "~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/pc_and_population_ROSMAP_LIBD_ASDbrain_EUR.txt", col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pop_ctp_eur, "~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/pc_and_population_ROSMAP_LIBD_ASDbrain_brainCTP_EUR.txt", col.names = T, row.names = F, quote = F, sep = "\t")

pgs_pheno_eur0 <- pgs_pheno %>% filter(genoid %in% pop_eur$genoid)

Write tables

ctp_pc <- readRDS("~/shared-gandalm/brain_CTP/Data/integration/ctp_differences/ctp_noclr_pc_ROSMAP_ASDbrain_LIBD.rds")
tmp <- inner_join(pheno, ctp_pc[,c(1, grep("Comp", colnames(ctp_pc)))], by = "IID")
pgs_pheno_ctp_pc <- inner_join(tmp, pgs_genoid, by = "IID")

pgs_pheno_ctp_pc_pop <- inner_join(pgs_pheno_ctp_pc, data.frame(genoid = pop_ctp$IID, population = pop_ctp$population), by = "genoid")

write.table(pgs_pheno_ctp_pc_pop, paste(out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_raw_allancestry.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")

Counts of ancestral groups

pop %>% group_by(str, population) %>% dplyr::summarise(n=n()) %>% pivot_wider(names_from = population, values_from = n) %>% mutate(total = sum(AFR, EUR, other, EAS, SAS, na.rm = T))
## `summarise()` has grouped output by 'str'. You can override using the `.groups`
## argument.
## # A tibble: 4 × 7
## # Groups:   str [4]
##   str                  AFR   EUR other   SAS   EAS total
##   <chr>              <int> <int> <int> <int> <int> <int>
## 1 LIBD_Illumina_1M     152   157    20    NA    NA   329
## 2 LIBD_Illumina_h650    64    63     5     1    NA   133
## 3 ROSMAP_WGS             1  1132     2    NA    NA  1135
## 4 UCLA_ASD               8    88     5     2     2   105
pop_ctp %>% group_by(str, population) %>% dplyr::summarise(n=n()) %>% pivot_wider(names_from = population, values_from = n) %>% mutate(total = sum(AFR, EUR, other, EAS, SAS, na.rm = T))
## `summarise()` has grouped output by 'str'. You can override using the `.groups`
## argument.
## # A tibble: 4 × 7
## # Groups:   str [4]
##   str                  AFR   EUR other   SAS   EAS total
##   <chr>              <int> <int> <int> <int> <int> <int>
## 1 LIBD_Illumina_1M     122   152    19    NA    NA   293
## 2 LIBD_Illumina_h650    57    61     5     1    NA   124
## 3 ROSMAP_WGS            NA   633    NA    NA    NA   633
## 4 UCLA_ASD               2    44     4     2     1    53
pop_ctp_eur %>% group_by(str, population) %>% dplyr::summarise(n=n())
## `summarise()` has grouped output by 'str'. You can override using the `.groups`
## argument.
## # A tibble: 4 × 3
## # Groups:   str [4]
##   str                population     n
##   <chr>              <chr>      <int>
## 1 LIBD_Illumina_1M   EUR          152
## 2 LIBD_Illumina_h650 EUR           61
## 3 ROSMAP_WGS         EUR          633
## 4 UCLA_ASD           EUR           44

1.8 Add geno PCs

  • found that there were some outliers in PC1/2/3 of the EUR subset
  • further analysis found that this was due to sample relatedness: n=6 were sample duplicates, then removing the n=7 people removed based on GRM rel<0.05 made the outliers disappear
  • as this is only n=7/>850 people, decided to remove these relatives
  • this is desirable as then the PCs capture population stratification, rather than just close relatives
genopc_eur <- read.table("~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/ROSMAP_ASDbrain_LIBD_imputed_MERGE_1_22_QC_EUR_brainCTP_ldprune_pca20.eigenvec", header = F, as.is = T)
genopc123_eur <- genopc_eur[,2:5]
colnames(genopc123_eur) <- c("genoid", "PC1", "PC2", "PC3")

pgs_pheno_eur <- inner_join(pgs_pheno_eur0, genopc123_eur, by = "genoid")

# Check ancestry clustering
ggplot(pgs_pheno_eur, aes(x = PC1, y = PC2, colour = study, alpha = 0.2)) + geom_point() +   scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "")

ggplot(pgs_pheno_eur, aes(x = PC1, y = PC3, colour = study, alpha = 0.2)) + geom_point() +  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "")

ggplot(pgs_pheno_eur, aes(x = PC2, y = PC3, colour = study, alpha = 0.2)) + geom_point() +  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "")

# Check outliers
# print("Outliers based on SD of PC > 4")
# pc1_sd <- sd(pgs_pheno_eur$PC1)
# pc2_sd <- sd(pgs_pheno_eur$PC2)
# pc3_sd <- sd(pgs_pheno_eur$PC3)
# 
# pgs_pheno_eur %>% filter(PC1/pc1_sd > 4) %>% datatable()
# pgs_pheno_eur %>% filter(PC2/pc2_sd > 4) %>% datatable()
# pgs_pheno_eur %>% filter(PC3/pc3_sd > 4) %>% datatable()
# 
# eur_outliers <- pgs_pheno_eur %>% filter((PC1/pc1_sd > 4) | (PC2/pc2_sd > 4) | (PC3/pc3_sd > 4))
# write.table(eur_outliers[,c("genoid", "genoid")], "~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/ROSMAP_ASDbrain_LIBD_EUR_outliers_PC123sd4.id", col.names = T, row.names = F, quote = F, sep = "\t")

1.8.1 Check PCA, removing EUR outliers (which are driven by close relatives)

# Remove ROSMAP outliers based on PC2
# rosmap2_outliers <- pgs_pheno_eur %>% filter(PC3 < -0.5)
# write.table(rosmap2_outliers[,c("genoid", "genoid")], "~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/ROSMAP_ASDbrain_LIBD_EUR_outliers_PC123_PC3ROSMAP2.id", col.names = F, row.names = F, quote = F, sep = "\t")

# Exclude the following specimenID
# SM-CTEIJ
# SM-CTEMN
# SM-CTEI8
# SM-CTEN3
# SM-CTED9
# SM-CTEE2

# The below PCA still includes relatives. It shows taht keeping relatives means that the PCs capture family structure rather than population structure
# genopc_eur <- read.table("~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/ROSMAP_ASDbrain_LIBD_imputed_MERGE_1_22_QC_EUR_brainCTP_ldprune_exclROSMAPoutlier_pca20.eigenvec", header = F, as.is = T)

# This PCA removes all relatives (rel < 0.05)
genopc_eur <- read.table("~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/ROSMAP_ASDbrain_LIBD_imputed_MERGE_1_22_QC_EUR_brainCTP_rel0.05_ldprune_pca20.eigenvec", header = F, as.is = T)
genopc123_eur <- genopc_eur[,2:5]
colnames(genopc123_eur) <- c("genoid", "PC1", "PC2", "PC3")

pgs_pheno_eur <- inner_join(pgs_pheno_eur0, genopc123_eur, by = "genoid")

# Check ancestry clustering
ggplot(pgs_pheno_eur, aes(x = PC1, y = PC2, colour = study, alpha = 0.2)) + geom_point() +   scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "")

ggplot(pgs_pheno_eur, aes(x = PC1, y = PC3, colour = study, alpha = 0.2)) + geom_point() +  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "")

ggplot(pgs_pheno_eur, aes(x = PC2, y = PC3, colour = study, alpha = 0.2)) + geom_point() +  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "")

1.9 Standardise to control groups

asd_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(asd_pgs_raw) %>% mean()
asd_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(asd_pgs_raw) %>% sd()
azd_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(azd_pgs_raw) %>% mean()
azd_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(azd_pgs_raw) %>% sd()
scz_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(scz_pgs_raw) %>% mean()
scz_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(scz_pgs_raw) %>% sd()
height_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(height_pgs_raw) %>% mean()
height_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(height_pgs_raw) %>% sd()

pgs_pheno_eur <- pgs_pheno_eur %>% 
  mutate(ASD_PGS = (asd_pgs_raw - asd_pgs_ctl_mean)/asd_pgs_ctl_sd) %>% 
  mutate(AZD_PGS = (azd_pgs_raw - azd_pgs_ctl_mean)/azd_pgs_ctl_sd) %>% 
  mutate(SCZ_PGS = (scz_pgs_raw - scz_pgs_ctl_mean)/scz_pgs_ctl_sd) %>%
  mutate(Height_PGS = (height_pgs_raw - height_pgs_ctl_mean)/height_pgs_ctl_sd)

if (pgs_trait_all == TRUE) {
  mdd_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(mdd_pgs_raw) %>% mean()
  mdd_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(mdd_pgs_raw) %>% sd()
  eduyears_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(eduyears_pgs_raw) %>% mean()
  eduyears_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(eduyears_pgs_raw) %>% sd()
#  chronotype_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(chronotype_pgs_raw) %>% mean()
#  chronotype_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(chronotype_pgs_raw) %>% sd()
#  iq_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(iq_pgs_raw) %>% mean()
#  iq_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(iq_pgs_raw) %>% sd()

  pgs_pheno_eur <- pgs_pheno_eur %>% 
  mutate(MDD_PGS = (mdd_pgs_raw - mdd_pgs_ctl_mean)/mdd_pgs_ctl_sd) %>%
  mutate(EduYears_PGS = (eduyears_pgs_raw - eduyears_pgs_ctl_mean)/eduyears_pgs_ctl_sd)
#  mutate(Chronotype_PGS = (chronotype_pgs_raw - chronotype_pgs_ctl_mean)/chronotype_pgs_ctl_sd) %>%
#  mutate(IQ_PGS = (iq_pgs_raw - iq_pgs_ctl_mean)/iq_pgs_ctl_sd)
}
  
MatchDx <- match(pgs_pheno_eur$dx, c("CTL", "ASD", "SCZ", "AZD"))
pgs_pheno_eur$dx <- fct_reorder(pgs_pheno_eur$dx, MatchDx)

1.10 Write output table

write.table(pgs_pheno_eur, paste(out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")

pgs_pheno_eur %>% group_by(study, dx) %>% dplyr::summarise(n=n())
pgs_pheno_eur %>% group_by(study, dx) %>% filter(!is.na(age) | !is.na(sex)) %>% dplyr::summarise(n=n())

pheno_out_dir <- "~/shared-gandalm/brain_CTP/Data/integration/gcta/pheno/ROSMAP_LIBD_ASDbrain"

# Try and see if removing low clr-transformed values improves h2
write.table(pgs_pheno_eur[which(pgs_pheno_eur$Exc >= -2), c("genoid", "genoid", "Exc")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_Exc.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[which(pgs_pheno_eur$Inh >= -2), c("genoid", "genoid", "Inh")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_Inh.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[which(pgs_pheno_eur$Astro >= -2), c("genoid", "genoid", "Astro")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_Astro.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[which(pgs_pheno_eur$Endo >= -2), c("genoid", "genoid", "Endo")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_Endo.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[which(pgs_pheno_eur$Micro >= -2), c("genoid", "genoid", "Micro")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_Micro.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[which(pgs_pheno_eur$Oligo >= -2), c("genoid", "genoid", "Oligo")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_Oligo.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[which(pgs_pheno_eur$OPC >= -2), c("genoid", "genoid", "OPC")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_OPC.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")

# clr-transformed data
write.table(pgs_pheno_eur[,c("genoid", "genoid", "Exc")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_Exc.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "Inh")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_Inh.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "Astro")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_Astro.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "Endo")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_Endo.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "Micro")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_Micro.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "Oligo")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_Oligo.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "OPC")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_OPC.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")

write.table(pgs_pheno_eur[,c("genoid", "genoid", "age", "PC1", "PC2", "PC3")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.age_genoPC123.qcov", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "age", "age2", "PC1", "PC2", "PC3")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.age_age2_genoPC123.qcov", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "sex", "batch", "dx")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.sex_batch_dx.cov", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "sex", "batch")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.sex_batch.cov", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")

# Write CTP PCs
ctp_pc <- readRDS("~/shared-gandalm/brain_CTP/Data/integration/ctp_differences/ctp_noclr_pc_ROSMAP_ASDbrain_LIBD.rds")
ctp_pc_genoid <- inner_join(genoid, ctp_pc, by = "IID")
write.table(ctp_pc_genoid[,c("genoid", "genoid", "Comp.1")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.ctp_pc1.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(ctp_pc_genoid[,c("genoid", "genoid", "Comp.2")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.ctp_pc2.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(ctp_pc_genoid[,c("genoid", "genoid", "Comp.3")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.ctp_pc3.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(ctp_pc_genoid[,c("genoid", "genoid", "Comp.4")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.ctp_pc4.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(ctp_pc_genoid[,c("genoid", "genoid", "Comp.5")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.ctp_pc5.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(ctp_pc_genoid[,c("genoid", "genoid", "Comp.6")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.ctp_pc6.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")


# CTL IDs
write.table(pgs_pheno_eur[which(pgs_pheno_eur$dx == "CTL"),c("genoid", "genoid")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.CTL.id", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")

2 Check distributions

2.1 Plot distribution

pgs_pheno_eur.long <- melt(pgs_pheno_eur, measure.vars = c(all_of(pgs_name)), variable.name = "Trait", value.name = "PGS")
## Warning in melt(pgs_pheno_eur, measure.vars = c(all_of(pgs_name)),
## variable.name = "Trait", : The melt generic in data.table has been passed
## a data.frame and will attempt to redirect to the relevant reshape2 method;
## please note that reshape2 is deprecated, and this redirection is now
## deprecated as well. To continue using melt methods from reshape2 while both
## libraries are attached, e.g. melt.list, you can prepend the namespace like
## reshape2::melt(pgs_pheno_eur). In the next version, this warning will become an
## error.
mean.df <- pgs_pheno_eur.long %>% group_by(study, dx, Trait) %>% dplyr::summarise(mean = mean(PGS))
## `summarise()` has grouped output by 'study', 'dx'. You can override using the
## `.groups` argument.
matchDx <- match(mean.df$dx, c("CTL", "ASD", "SCZ", "AZD"))

pgs_pheno_eur.long %>%
  mutate(MatchDx = match(dx, c("CTL", "ASD", "SCZ", "AZD"))) %>%
  mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS", "Height_PGS"))) %>%
  mutate(dx = fct_reorder(dx, MatchDx)) %>% 
  #mutate(MatchTrait = match(Trait, all_of(pgs_name))) %>%
  mutate(Trait = fct_reorder(Trait, MatchTrait)) %>% 
  ggplot(aes(x = PGS, fill = dx)) +
  geom_density(alpha = 0.5) +
  geom_vline(data=mean.df, aes(xintercept=mean, colour=dx), linetype="dashed", size=0.5) +
  facet_grid(Trait ~ study) +
  scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw()

ggsave(paste(out_dir, "/pgs_EUR_studyxdx_distribution.svg", sep = ""), height = 10, width = 7)
ggsave(paste(out_dir, "/pgs_EUR_studyxdx_distribution.png", sep = ""), height = 10, width = 7)

mean.df2 <- pgs_pheno_eur.long %>% group_by(study, dx, Trait) %>% dplyr::summarise(mean = mean(PGS)) %>% filter((study == "ASDbrain" & Trait == "ASD_PGS") | (study == "LIBD" & Trait == "SCZ_PGS") | (study == "ROSMAP" & Trait == "AZD_PGS"))
## `summarise()` has grouped output by 'study', 'dx'. You can override using the
## `.groups` argument.
# Diagnosis-specific plot only
pgs_pheno_eur.long %>% filter((study == "ASDbrain" & Trait == "ASD_PGS") | (study == "LIBD" & Trait == "SCZ_PGS") | (study == "ROSMAP" & Trait == "AZD_PGS")) %>%
  mutate(MatchDx = match(dx, c("CTL", "ASD", "SCZ", "AZD"))) %>%
  mutate(dx = fct_reorder(dx, MatchDx)) %>% 
  mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS"))) %>%
  mutate(Trait = fct_reorder(Trait, MatchTrait)) %>% 
  ggplot(aes(x = PGS, fill = dx)) +
  geom_density(alpha = 0.5) +
  geom_vline(data=mean.df2, aes(xintercept=mean, colour=dx), linetype="dashed", size=0.5) +
  facet_wrap( ~ study, ncol = 2) +
  scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw()

ggsave(paste(out_dir, "/pgs_EUR_studyxdx_distribution_pgsdx.svg", sep = ""), height = 4, width = 4)
ggsave(paste(out_dir, "/pgs_EUR_studyxdx_distribution_pgsdx.png", sep = ""), height = 4, width = 4)

pgs_pheno_eur.long %>% filter((study == "ASDbrain" & Trait == "ASD_PGS") | (study == "LIBD" & Trait == "SCZ_PGS") | (study == "ROSMAP" & Trait == "AZD_PGS")) %>%
  mutate(MatchDx = match(dx, c("CTL", "ASD", "SCZ", "AZD"))) %>%
  mutate(dx = fct_reorder(dx, MatchDx)) %>% 
  mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS"))) %>%
  mutate(Trait = fct_reorder(Trait, MatchTrait)) %>% 
  ggplot(aes(x = PGS, fill = dx)) +
  geom_density(alpha = 0.5) +
  geom_vline(data=mean.df2, aes(xintercept=mean, colour=dx), linetype="dashed", size=0.5) +
  facet_wrap( ~ study, nrow = 1) +
  scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() + theme(legend.position = "bottom")

ggsave(paste(out_dir, "/pgs_EUR_studyxdx_distribution_pgsdx_1row.svg", sep = ""), height = 3, width = 10)
ggsave(paste(out_dir, "/pgs_EUR_studyxdx_distribution_pgsdx_1row.png", sep = ""), height = 3, width = 10)

3 PGS-trait associations

3.1 Check PGS within-study

For target trait

summary(lm(ASD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
## 
## Call:
## lm(formula = ASD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == 
##     "ASDbrain"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.70680 -0.70067  0.03128  0.44351  1.59303 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.19978    0.20554   0.972    0.337
## dxASD       -0.03142    0.26239  -0.120    0.905
## 
## Residual standard error: 0.8475 on 42 degrees of freedom
## Multiple R-squared:  0.0003412,  Adjusted R-squared:  -0.02346 
## F-statistic: 0.01434 on 1 and 42 DF,  p-value: 0.9053
summary(lm(SCZ_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")))
## 
## Call:
## lm(formula = SCZ_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == 
##     "LIBD"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3485 -0.6288 -0.0191  0.6558  3.0219 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.21371    0.09572   2.233   0.0266 *  
## dxSCZ        0.73259    0.14822   4.943 1.58e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.062 on 209 degrees of freedom
## Multiple R-squared:  0.1047, Adjusted R-squared:  0.1004 
## F-statistic: 24.43 on 1 and 209 DF,  p-value: 1.578e-06
summary(lm(AZD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
## 
## Call:
## lm(formula = AZD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4817 -0.7085 -0.1000  0.6425  3.9801 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.10067    0.05501  -1.830   0.0677 .  
## dxAZD        0.66672    0.08532   7.814 2.37e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.05 on 621 degrees of freedom
## Multiple R-squared:  0.08953,    Adjusted R-squared:  0.08806 
## F-statistic: 61.06 on 1 and 621 DF,  p-value: 2.37e-14

Height as a negative control

summary(lm(Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
## 
## Call:
## lm(formula = Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == 
##     "ASDbrain"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.30563 -0.59799  0.02801  0.60879  2.03531 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.04286    0.21901  -0.196    0.846
## dxASD       -0.42674    0.27959  -1.526    0.134
## 
## Residual standard error: 0.903 on 42 degrees of freedom
## Multiple R-squared:  0.05255,    Adjusted R-squared:  0.02999 
## F-statistic:  2.33 on 1 and 42 DF,  p-value: 0.1344
summary(lm(Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")))
## 
## Call:
## lm(formula = Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == 
##     "LIBD"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.62773 -0.61812  0.01064  0.72509  2.46164 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.30750    0.08563  -3.591 0.000411 ***
## dxSCZ        0.28937    0.13260   2.182 0.030205 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9497 on 209 degrees of freedom
## Multiple R-squared:  0.02228,    Adjusted R-squared:  0.0176 
## F-statistic: 4.762 on 1 and 209 DF,  p-value: 0.0302
summary(lm(Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
## 
## Call:
## lm(formula = Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.6328 -0.5774  0.0599  0.7034  2.7155 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.10591    0.04882   2.170   0.0304 *
## dxAZD       -0.04362    0.07571  -0.576   0.5647  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9313 on 621 degrees of freedom
## Multiple R-squared:  0.0005343,  Adjusted R-squared:  -0.001075 
## F-statistic: 0.332 on 1 and 621 DF,  p-value: 0.5647

Other neuropsych traits

summary(lm(MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
## 
## Call:
## lm(formula = MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == 
##     "ASDbrain"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.64764 -0.38676 -0.06134  0.38921  1.68266 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.2321     0.1807   1.284    0.206
## dxASD        -0.2572     0.2307  -1.115    0.271
## 
## Residual standard error: 0.7452 on 42 degrees of freedom
## Multiple R-squared:  0.02874,    Adjusted R-squared:  0.005614 
## F-statistic: 1.243 on 1 and 42 DF,  p-value: 0.2713
summary(lm(MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")))
## 
## Call:
## lm(formula = MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == 
##     "LIBD"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7104 -0.6581 -0.0124  0.7187  2.5275 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.14183    0.08848   1.603     0.11
## dxSCZ        0.16867    0.13701   1.231     0.22
## 
## Residual standard error: 0.9813 on 209 degrees of freedom
## Multiple R-squared:  0.007199,   Adjusted R-squared:  0.002449 
## F-statistic: 1.516 on 1 and 209 DF,  p-value: 0.2197
summary(lm(MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
## 
## Call:
## lm(formula = MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3406 -0.6688  0.0165  0.6718  2.7367 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.05877    0.05321  -1.104    0.270
## dxAZD        0.04199    0.08252   0.509    0.611
## 
## Residual standard error: 1.015 on 621 degrees of freedom
## Multiple R-squared:  0.0004167,  Adjusted R-squared:  -0.001193 
## F-statistic: 0.2589 on 1 and 621 DF,  p-value: 0.6111
summary(lm(EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
## 
## Call:
## lm(formula = EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == 
##     "ASDbrain"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.83154 -0.48557  0.03916  0.47274  1.99466 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -0.4965     0.1804  -2.752  0.00872 **
## dxASD         0.6353     0.2304   2.758  0.00858 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.744 on 42 degrees of freedom
## Multiple R-squared:  0.1533, Adjusted R-squared:  0.1332 
## F-statistic: 7.605 on 1 and 42 DF,  p-value: 0.008578
summary(lm(EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")))
## 
## Call:
## lm(formula = EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == 
##     "LIBD"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2447 -0.6284 -0.0321  0.6852  3.2895 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.38183    0.09529  -4.007 8.56e-05 ***
## dxSCZ        0.31194    0.14756   2.114   0.0357 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.057 on 209 degrees of freedom
## Multiple R-squared:  0.02094,    Adjusted R-squared:  0.01625 
## F-statistic: 4.469 on 1 and 209 DF,  p-value: 0.0357
summary(lm(EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
## 
## Call:
## lm(formula = EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8826 -0.6633 -0.0003  0.6951  2.6612 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   0.1522     0.0501   3.038  0.00248 **
## dxAZD         0.1034     0.0777   1.331  0.18362   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9558 on 621 degrees of freedom
## Multiple R-squared:  0.002845,   Adjusted R-squared:  0.00124 
## F-statistic: 1.772 on 1 and 621 DF,  p-value: 0.1836

For target trait + genoPC covariates

summary(lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
## 
## Call:
## lm(formula = ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "ASDbrain"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.40854 -0.57909  0.02969  0.52221  1.58669 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.1899     0.2114   0.898    0.375
## dxASD         0.0173     0.2648   0.065    0.948
## PC1           1.8109     2.8351   0.639    0.527
## PC2           0.6257     7.3194   0.085    0.932
## PC3           7.7982     5.8407   1.335    0.190
## 
## Residual standard error: 0.8332 on 39 degrees of freedom
## Multiple R-squared:  0.1027, Adjusted R-squared:  0.01067 
## F-statistic: 1.116 on 4 and 39 DF,  p-value: 0.3629
summary(lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")))
## 
## Call:
## lm(formula = SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "LIBD"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2426 -0.6028  0.0017  0.6458  2.5156 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.2266     0.1023   2.215   0.0278 *  
## dxSCZ         0.7560     0.1451   5.211 4.55e-07 ***
## PC1           4.9724     2.1753   2.286   0.0233 *  
## PC2          -0.2507     3.1578  -0.079   0.9368    
## PC3           5.1390     3.0862   1.665   0.0974 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.037 on 206 degrees of freedom
## Multiple R-squared:  0.1585, Adjusted R-squared:  0.1422 
## F-statistic: 9.703 on 4 and 206 DF,  p-value: 3.289e-07
summary(lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
## 
## Call:
## lm(formula = AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4298 -0.7420 -0.0958  0.6509  4.0119 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.10490    0.05526  -1.898   0.0581 .  
## dxAZD        0.67210    0.08559   7.853  1.8e-14 ***
## PC1         -1.25063    1.44256  -0.867   0.3863    
## PC2          1.45096    1.14018   1.273   0.2036    
## PC3         -1.32891    1.18778  -1.119   0.2637    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.049 on 618 degrees of freedom
## Multiple R-squared:  0.0944, Adjusted R-squared:  0.08853 
## F-statistic:  16.1 on 4 and 618 DF,  p-value: 1.491e-12

Height as a negative control + covariates

summary(lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
## 
## Call:
## lm(formula = Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "ASDbrain"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.51454 -0.45931 -0.03165  0.55819  1.78388 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   0.08575    0.19458   0.441  0.66186   
## dxASD        -0.52923    0.24380  -2.171  0.03610 * 
## PC1          -9.20878    2.61013  -3.528  0.00109 **
## PC2         -11.45093    6.73856  -1.699  0.09722 . 
## PC3          -2.52388    5.37719  -0.469  0.64142   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7671 on 39 degrees of freedom
## Multiple R-squared:  0.3651, Adjusted R-squared:    0.3 
## F-statistic: 5.608 on 4 and 39 DF,  p-value: 0.001153
summary(lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")))
## 
## Call:
## lm(formula = Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "LIBD"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.47283 -0.58857 -0.00698  0.64378  2.41847 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.23945    0.08919  -2.685  0.00785 ** 
## dxSCZ        0.26948    0.12652   2.130  0.03436 *  
## PC1         -8.69344    1.89708  -4.583 7.95e-06 ***
## PC2         -0.89584    2.75385  -0.325  0.74528    
## PC3          0.37665    2.69144   0.140  0.88884    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.904 on 206 degrees of freedom
## Multiple R-squared:  0.1269, Adjusted R-squared:  0.1099 
## F-statistic: 7.484 on 4 and 206 DF,  p-value: 1.198e-05
summary(lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
## 
## Call:
## lm(formula = Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.38146 -0.58125  0.03068  0.59682  2.39668 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.07089    0.04330   1.637  0.10213    
## dxAZD        -0.05106    0.06707  -0.761  0.44676    
## PC1         -14.35108    1.13048 -12.695  < 2e-16 ***
## PC2           2.69660    0.89351   3.018  0.00265 ** 
## PC3          -4.74250    0.93081  -5.095 4.64e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8223 on 618 degrees of freedom
## Multiple R-squared:  0.2247, Adjusted R-squared:  0.2197 
## F-statistic: 44.77 on 4 and 618 DF,  p-value: < 2.2e-16

Check other trait-PGS combinations

summary(lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
## 
## Call:
## lm(formula = SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "ASDbrain"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.94717 -0.44216  0.00504  0.37978  1.69613 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -0.1297     0.1880  -0.690  0.49437   
## dxASD         0.4012     0.2355   1.703  0.09644 . 
## PC1           7.0216     2.5214   2.785  0.00822 **
## PC2           2.5921     6.5094   0.398  0.69265   
## PC3           2.4814     5.1944   0.478  0.63552   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.741 on 39 degrees of freedom
## Multiple R-squared:  0.2993, Adjusted R-squared:  0.2274 
## F-statistic: 4.164 on 4 and 39 DF,  p-value: 0.006652
summary(lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
## 
## Call:
## lm(formula = AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "ASDbrain"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.90427 -0.61964 -0.05869  0.35414  2.05747 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.1270     0.2243   0.566    0.574
## dxASD         0.1972     0.2810   0.702    0.487
## PC1           3.9416     3.0082   1.310    0.198
## PC2           9.3476     7.7661   1.204    0.236
## PC3           1.1508     6.1972   0.186    0.854
## 
## Residual standard error: 0.8841 on 39 degrees of freedom
## Multiple R-squared:  0.07464,    Adjusted R-squared:  -0.02026 
## F-statistic: 0.7865 on 4 and 39 DF,  p-value: 0.541
summary(lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")))
## 
## Call:
## lm(formula = ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "LIBD"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.92865 -0.61006 -0.04222  0.75846  2.48111 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   0.1349     0.1035   1.303   0.1939  
## dxSCZ        -0.2050     0.1469  -1.396   0.1642  
## PC1           5.1967     2.2023   2.360   0.0192 *
## PC2          -1.5921     3.1969  -0.498   0.6190  
## PC3           2.2101     3.1244   0.707   0.4801  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.049 on 206 degrees of freedom
## Multiple R-squared:  0.05482,    Adjusted R-squared:  0.03647 
## F-statistic: 2.987 on 4 and 206 DF,  p-value: 0.01998
summary(lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")))
## 
## Call:
## lm(formula = AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "LIBD"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.08422 -0.80958 -0.03644  0.61038  2.87367 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   0.2938     0.1033   2.845  0.00489 **
## dxSCZ        -0.1357     0.1465  -0.927  0.35521   
## PC1           0.3239     2.1965   0.147  0.88292   
## PC2           1.3772     3.1885   0.432  0.66624   
## PC3           1.2873     3.1163   0.413  0.67996   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.047 on 206 degrees of freedom
## Multiple R-squared:  0.006619,   Adjusted R-squared:  -0.01267 
## F-statistic: 0.3432 on 4 and 206 DF,  p-value: 0.8486
summary(lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
## 
## Call:
## lm(formula = ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7990 -0.6490 -0.0029  0.6411  2.5700 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.05740    0.05159  -1.113 0.266319    
## dxAZD       -0.05185    0.07991  -0.649 0.516712    
## PC1          4.85087    1.34693   3.601 0.000342 ***
## PC2          1.52963    1.06459   1.437 0.151274    
## PC3          1.15893    1.10904   1.045 0.296436    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9797 on 618 degrees of freedom
## Multiple R-squared:  0.027,  Adjusted R-squared:  0.02071 
## F-statistic: 4.288 on 4 and 618 DF,  p-value: 0.001981
summary(lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
## 
## Call:
## lm(formula = SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.59015 -0.61562 -0.01351  0.63657  2.65252 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.06766    0.05034  -1.344 0.179473    
## dxAZD        0.03916    0.07797   0.502 0.615701    
## PC1          4.44107    1.31425   3.379 0.000773 ***
## PC2         -2.44845    1.03876  -2.357 0.018729 *  
## PC3          6.81406    1.08212   6.297 5.76e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.956 on 618 degrees of freedom
## Multiple R-squared:  0.07866,    Adjusted R-squared:  0.07269 
## F-statistic: 13.19 on 4 and 618 DF,  p-value: 2.568e-10

Other neuropsych traits

summary(lm(MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
## 
## Call:
## lm(formula = MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "ASDbrain"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.51991 -0.48935 -0.01293  0.41645  1.72214 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.23680    0.19429   1.219    0.230
## dxASD       -0.24577    0.24343  -1.010    0.319
## PC1          0.07066    2.60622   0.027    0.979
## PC2         -1.15552    6.72847  -0.172    0.865
## PC3          3.55877    5.36913   0.663    0.511
## 
## Residual standard error: 0.7659 on 39 degrees of freedom
## Multiple R-squared:  0.04717,    Adjusted R-squared:  -0.05056 
## F-statistic: 0.4827 on 4 and 39 DF,  p-value: 0.7483
summary(lm(MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")))
## 
## Call:
## lm(formula = MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "LIBD"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.54716 -0.59004 -0.05149  0.64961  2.46207 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.08548    0.09613   0.889   0.3749  
## dxSCZ        0.18649    0.13637   1.368   0.1729  
## PC1          1.49830    2.04471   0.733   0.4645  
## PC2         -6.16962    2.96816  -2.079   0.0389 *
## PC3         -0.94688    2.90089  -0.326   0.7444  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9743 on 206 degrees of freedom
## Multiple R-squared:  0.03535,    Adjusted R-squared:  0.01662 
## F-statistic: 1.887 on 4 and 206 DF,  p-value: 0.1139
summary(lm(MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
## 
## Call:
## lm(formula = MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3639 -0.6747  0.0236  0.6631  2.8146 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.06134    0.05241  -1.170   0.2423    
## dxAZD        0.01363    0.08117   0.168   0.8667    
## PC1          1.40250    1.36819   1.025   0.3057    
## PC2         -1.96213    1.08140  -1.814   0.0701 .  
## PC3          5.55605    1.12654   4.932 1.05e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9952 on 618 degrees of freedom
## Multiple R-squared:  0.04395,    Adjusted R-squared:  0.03777 
## F-statistic: 7.103 on 4 and 618 DF,  p-value: 1.354e-05
summary(lm(EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
## 
## Call:
## lm(formula = EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "ASDbrain"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.67080 -0.33700 -0.02351  0.40997  1.15930 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.6579     0.1652  -3.983 0.000288 ***
## dxASD         0.6056     0.2070   2.926 0.005699 ** 
## PC1           7.2264     2.2159   3.261 0.002308 ** 
## PC2          -2.0668     5.7208  -0.361 0.719838    
## PC3          -3.9728     4.5651  -0.870 0.389484    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6512 on 39 degrees of freedom
## Multiple R-squared:  0.3976, Adjusted R-squared:  0.3358 
## F-statistic: 6.436 on 4 and 39 DF,  p-value: 0.0004462
summary(lm(EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")))
## 
## Call:
## lm(formula = EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "LIBD"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9466 -0.6697  0.0026  0.6319  3.2555 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.3707     0.1008  -3.678  0.00030 ***
## dxSCZ         0.3532     0.1430   2.470  0.01431 *  
## PC1           2.0413     2.1438   0.952  0.34212    
## PC2          -8.7283     3.1120  -2.805  0.00552 ** 
## PC3           7.2795     3.0415   2.393  0.01759 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.022 on 206 degrees of freedom
## Multiple R-squared:  0.09836,    Adjusted R-squared:  0.08086 
## F-statistic: 5.618 on 4 and 206 DF,  p-value: 0.0002599
summary(lm(EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
## 
## Call:
## lm(formula = EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.80814 -0.64290  0.02276  0.68834  2.28083 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.16773    0.04928   3.404 0.000708 ***
## dxAZD        0.09505    0.07632   1.245 0.213480    
## PC1          2.68199    1.28642   2.085 0.037493 *  
## PC2         -4.99453    1.01676  -4.912 1.15e-06 ***
## PC3          1.96378    1.05921   1.854 0.064215 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9357 on 618 degrees of freedom
## Multiple R-squared:  0.04891,    Adjusted R-squared:  0.04275 
## F-statistic: 7.945 on 4 and 618 DF,  p-value: 3.003e-06

3.2 Check PGS as a mega-analysis

summary(lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur))
## 
## Call:
## lm(formula = ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.05213 -0.63461 -0.01263  0.66105  2.54391 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.002554   0.044111  -0.058    0.954    
## dxASD        0.094980   0.196814   0.483    0.630    
## dxSCZ       -0.072857   0.115052  -0.633    0.527    
## dxAZD       -0.103973   0.076414  -1.361    0.174    
## PC1          5.171833   0.999518   5.174 2.84e-07 ***
## PC2          1.031131   0.992188   1.039    0.299    
## PC3          1.120763   1.005392   1.115    0.265    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9898 on 871 degrees of freedom
## Multiple R-squared:  0.03684,    Adjusted R-squared:  0.03021 
## F-statistic: 5.553 on 6 and 871 DF,  p-value: 1.16e-05
summary(lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur))
## 
## Call:
## lm(formula = SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2486 -0.6409 -0.0291  0.6373  2.7723 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.004881   0.043168   0.113    0.910    
## dxASD        0.285279   0.192609   1.481    0.139    
## dxSCZ        0.985448   0.112593   8.752  < 2e-16 ***
## dxAZD       -0.027260   0.074782  -0.365    0.716    
## PC1          4.798916   0.978162   4.906 1.11e-06 ***
## PC2         -2.417490   0.970989  -2.490    0.013 *  
## PC3          6.229686   0.983911   6.332 3.88e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9686 on 871 degrees of freedom
## Multiple R-squared:  0.1462, Adjusted R-squared:  0.1403 
## F-statistic: 24.86 on 6 and 871 DF,  p-value: < 2.2e-16
summary(lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur))
## 
## Call:
## lm(formula = AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4786 -0.7476 -0.0937  0.6373  4.0187 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.002046   0.046638  -0.044    0.965    
## dxASD        0.281628   0.208089   1.353    0.176    
## dxSCZ        0.128455   0.121643   1.056    0.291    
## dxAZD        0.577285   0.080792   7.145  1.9e-12 ***
## PC1          0.511076   1.056779   0.484    0.629    
## PC2          1.122238   1.049030   1.070    0.285    
## PC3         -1.128055   1.062990  -1.061    0.289    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.046 on 871 degrees of freedom
## Multiple R-squared:  0.05737,    Adjusted R-squared:  0.05087 
## F-statistic: 8.835 on 6 and 871 DF,  p-value: 2.271e-09

3.2.1 Plot of effect sizes

Should I exclude the asd_exclude group, which are excluded for CTP balance purposes? Or fine for PGS purposes?

No covariates

# No covariates
tidy_df_nocov_asd_asdbrain <- parameters::model_parameters(model = lm(ASD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
tidy_df_nocov_asd_libd <- parameters::model_parameters(model = lm(ASD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
tidy_df_nocov_asd_rosmap <- parameters::model_parameters(model = lm(ASD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
tidy_df_nocov_asd <- rbind(tidy_df_nocov_asd_asdbrain, tidy_df_nocov_asd_libd, tidy_df_nocov_asd_rosmap)

tidy_df_nocov_scz_asdbrain <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
tidy_df_nocov_scz_libd <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
tidy_df_nocov_scz_rosmap <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
tidy_df_nocov_scz <- rbind(tidy_df_nocov_scz_asdbrain, tidy_df_nocov_scz_libd, tidy_df_nocov_scz_rosmap)

tidy_df_nocov_azd_asdbrain <- parameters::model_parameters(model = lm(AZD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
tidy_df_nocov_azd_libd <- parameters::model_parameters(model = lm(AZD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
tidy_df_nocov_azd_rosmap <- parameters::model_parameters(model = lm(AZD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
tidy_df_nocov_azd <- rbind(tidy_df_nocov_azd_asdbrain, tidy_df_nocov_azd_libd, tidy_df_nocov_azd_rosmap)

tidy_df_nocov_height_asdbrain <- parameters::model_parameters(model = lm(Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
tidy_df_nocov_height_libd <- parameters::model_parameters(model = lm(Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
tidy_df_nocov_height_rosmap <- parameters::model_parameters(model = lm(Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
tidy_df_nocov_height <- rbind(tidy_df_nocov_height_asdbrain, tidy_df_nocov_height_libd, tidy_df_nocov_height_rosmap)

tidy_df_nocov_mdd_asdbrain <- parameters::model_parameters(model = lm(MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "MDD_PGS")
tidy_df_nocov_mdd_libd <- parameters::model_parameters(model = lm(MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "MDD_PGS")
tidy_df_nocov_mdd_rosmap <- parameters::model_parameters(model = lm(MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "MDD_PGS")
tidy_df_nocov_mdd <- rbind(tidy_df_nocov_mdd_asdbrain, tidy_df_nocov_mdd_libd, tidy_df_nocov_mdd_rosmap)


tidy_df_nocov_eduyears_asdbrain <- parameters::model_parameters(model = lm(EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "EduYears_PGS")
tidy_df_nocov_eduyears_libd <- parameters::model_parameters(model = lm(EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "EduYears_PGS")
tidy_df_nocov_eduyears_rosmap <- parameters::model_parameters(model = lm(EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "EduYears_PGS")
tidy_df_nocov_eduyears <- rbind(tidy_df_nocov_eduyears_asdbrain, tidy_df_nocov_eduyears_libd, tidy_df_nocov_eduyears_rosmap)

# rbind together
tidy_df_nocov_mega <- rbind(tidy_df_nocov_asd, tidy_df_nocov_scz, tidy_df_nocov_azd, tidy_df_nocov_height, tidy_df_nocov_mdd, tidy_df_nocov_eduyears) %>% filter(term != "(Intercept)")

# Add label
tidy_df_nocov_mega$lab <- ifelse(tidy_df_nocov_mega$p.value < 0.05, paste(
  "b=", ifelse(abs(tidy_df_nocov_mega$estimate) < 0.01, formatC(tidy_df_nocov_mega$estimate, format = "e", digits = 1), round(tidy_df_nocov_mega$estimate, 2)), ", ", 
  "p=", ifelse(tidy_df_nocov_mega$p.value < 0.01, formatC(tidy_df_nocov_mega$p.value, format = "e", digits = 1), round(tidy_df_nocov_mega$p.value, 2)), ", ", 
  "df=", tidy_df_nocov_mega$df.error, sep = ""), NA)

# Plot
tidy_df_nocov_mega %>%
  mutate(MatchTerm = match(term, c("dxASD", "dxSCZ", "dxAZD"))) %>%
  mutate(term = fct_reorder(term, MatchTerm)) %>% 
  mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS", "Height_PGS"))) %>%
  mutate(Trait = fct_reorder(Trait, MatchTrait)) %>% 
  ggplot(aes(x = fct_rev(term), y = estimate, colour = term, label = lab)) +
    geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
#    geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
    geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
    geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
    scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4,1)], name = "") +
    xlab("diagnosis") +
    ylab("estimate") +
    coord_flip() +
    theme_bw() + theme(legend.position = "none") +
    facet_wrap(~ Trait, ncol = 3)
## Warning: Removed 13 rows containing missing values (geom_label_repel).

ggsave(paste(out_dir, "/pgs_EUR_lmdx_nocov.svg", sep = ""), height = 3, width = 7)
## Warning: Removed 13 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_nocov.png", sep = ""), height = 3, width = 7)
## Warning: Removed 13 rows containing missing values (geom_label_repel).

Covariates

# Covariates
#tidy_df_cov_asd <- parameters::model_parameters(model = lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
#tidy_df_cov_scz <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
#tidy_df_cov_azd <- parameters::model_parameters(model = lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
#tidy_df_cov_height <- parameters::model_parameters(model = lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")

# Don't fit jointly: do per study
tidy_df_cov_asd_asdbrain <- parameters::model_parameters(model = lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID)), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
tidy_df_cov_scz_asdbrain <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID)), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
tidy_df_cov_azd_asdbrain <- parameters::model_parameters(model = lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID)), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
tidy_df_cov_asd <- rbind(tidy_df_cov_asd_asdbrain, tidy_df_cov_scz_asdbrain, tidy_df_cov_azd_asdbrain)

tidy_df_cov_asd_libd <- parameters::model_parameters(model = lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID)), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
tidy_df_cov_scz_libd <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID)), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
tidy_df_cov_azd_libd <- parameters::model_parameters(model = lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID)), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
tidy_df_cov_scz <- rbind(tidy_df_cov_asd_libd, tidy_df_cov_scz_libd, tidy_df_cov_azd_libd)

tidy_df_cov_asd_rosmap <- parameters::model_parameters(model = lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
tidy_df_cov_scz_rosmap <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
tidy_df_cov_azd_rosmap <- parameters::model_parameters(model = lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
tidy_df_cov_azd <- rbind(tidy_df_cov_asd_rosmap, tidy_df_cov_scz_rosmap, tidy_df_cov_azd_rosmap)

tidy_df_cov_asd_height  <- parameters::model_parameters(model = lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
tidy_df_cov_scz_height <- parameters::model_parameters(model = lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
tidy_df_cov_azd_height <- parameters::model_parameters(model = lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
tidy_df_cov_height <- rbind(tidy_df_cov_asd_height, tidy_df_cov_scz_height, tidy_df_cov_azd_height)

tidy_df_cov_asd_mdd  <- parameters::model_parameters(model = lm(MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "MDD_PGS")
tidy_df_cov_scz_mdd <- parameters::model_parameters(model = lm(MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "MDD_PGS")
tidy_df_cov_azd_mdd <- parameters::model_parameters(model = lm(MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "MDD_PGS")
tidy_df_cov_mdd <- rbind(tidy_df_cov_asd_mdd, tidy_df_cov_scz_mdd, tidy_df_cov_azd_mdd)

tidy_df_cov_asd_eduyears  <- parameters::model_parameters(model = lm(EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "EduYears_PGS")
tidy_df_cov_scz_eduyears <- parameters::model_parameters(model = lm(EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "EduYears_PGS")
tidy_df_cov_azd_eduyears <- parameters::model_parameters(model = lm(EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "EduYears_PGS")
tidy_df_cov_eduyears <- rbind(tidy_df_cov_asd_eduyears, tidy_df_cov_scz_eduyears, tidy_df_cov_azd_eduyears)

# rbind together
tidy_df_cov_mega <- rbind(tidy_df_cov_asd, tidy_df_cov_scz, tidy_df_cov_azd, tidy_df_cov_height, tidy_df_cov_mdd, tidy_df_cov_eduyears) %>% filter(term %in% c("dxASD", "dxSCZ", "dxAZD"))

# Add label
tidy_df_cov_mega$lab <- ifelse(tidy_df_cov_mega$p.value < 0.05, paste(
  "b=", ifelse(abs(tidy_df_cov_mega$estimate < 0.01), formatC(tidy_df_cov_mega$estimate, format = "e", digits = 1), round(tidy_df_cov_mega$estimate, 2)), ", ", 
  "p=", ifelse(tidy_df_cov_mega$p.value < 0.01, formatC(tidy_df_cov_mega$p.value, format = "e", digits = 1), round(tidy_df_cov_mega$p.value, 2)), ", ", 
  "df=", tidy_df_cov_mega$df.error, sep = ""), NA)

# Plot
tidy_df_cov_mega %>%
  mutate(diagnosis = "diagnosis") %>%
  mutate(MatchTerm = match(term, c("dxASD", "dxSCZ", "dxAZD"))) %>%
  mutate(term = fct_reorder(term, MatchTerm)) %>% 
  mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS", "Height_PGS"))) %>%
  mutate(Trait = fct_reorder(Trait, MatchTrait)) %>% 
  ggplot(aes(x = fct_rev(term), y = estimate, colour = term, label = lab)) +
    geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
#    geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
    geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
    geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
    scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "") +
    xlab("diagnosis") +
    ylab("estimate (adjusting for geno PC1-3)") +
    coord_flip() +
    theme_bw() + theme(legend.position = "none") +
    facet_grid(Trait ~ diagnosis)
## Warning: Removed 12 rows containing missing values (geom_label_repel).

ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_1col.svg", sep = ""), height = 10, width = 3)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_1col.png", sep = ""), height = 10, width = 3)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
tidy_df_cov_mega %>%
  mutate(diagnosis = "diagnosis") %>%
  mutate(MatchTerm = match(term, c("dxASD", "dxSCZ", "dxAZD"))) %>%
  mutate(term = fct_reorder(term, MatchTerm)) %>% 
  mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS", "Height_PGS"))) %>%
  mutate(Trait = fct_reorder(Trait, MatchTrait)) %>% 
  ggplot(aes(x = fct_rev(term), y = estimate, colour = term, label = lab)) +
    geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
#    geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
    geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
    geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
    scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "") +
    xlab("diagnosis") +
    ylab("estimate (adjusting for geno PC1-3)") +
    coord_flip() +
    theme_bw() + theme(legend.position = "none") +
    facet_wrap(~ Trait, nrow = 2)
## Warning: Removed 12 rows containing missing values (geom_label_repel).

#    facet_grid(Trait ~ diagnosis)

ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_2row.svg", sep = ""), height = 6, width = 6)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_2row.png", sep = ""), height = 6, width = 6)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
# Plot
tidy_df_cov_mega %>%
  mutate(diagnosis = "diagnosis") %>%
  mutate(MatchTerm = match(term, c("dxASD", "dxSCZ", "dxAZD"))) %>%
  mutate(term = fct_reorder(term, MatchTerm)) %>% 
  mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS", "Height_PGS"))) %>%
  mutate(Trait = fct_reorder(Trait, MatchTrait)) %>% 
  ggplot(aes(x = fct_rev(term), y = estimate, colour = term, label = lab)) +
    geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
#    geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
    geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
    geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
    scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "") +
    xlab("diagnosis") +
    ylab("estimate (adjusting for geno PC1-3)") +
    coord_flip() +
    theme_bw() + theme(legend.position = "none") +
    facet_wrap(~ Trait, nrow = 1)
## Warning: Removed 12 rows containing missing values (geom_label_repel).

#    facet_grid(Trait ~ diagnosis)

ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_1row.svg", sep = ""), height = 3, width = 10)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_1row.png", sep = ""), height = 3, width = 10)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
# Plot
tidy_df_cov_mega %>% filter(Trait %in% c("ASD_PGS", "SCZ_PGS", "AZD_PGS")) %>%
  mutate(diagnosis = "diagnosis") %>%
  mutate(MatchTerm = match(term, c("dxASD", "dxSCZ", "dxAZD"))) %>%
  mutate(term = fct_reorder(term, MatchTerm)) %>% 
  mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS"))) %>%
  mutate(Trait = fct_reorder(Trait, MatchTrait)) %>% 
  ggplot(aes(x = fct_rev(term), y = estimate, colour = term, label = lab)) +
    geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
#    geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
    geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
    geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
    scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "") +
    theme_bw() + theme(legend.position = "none") +
    xlab("diagnosis") +
    ylab("estimate (adjusting for geno PC1-3)") +
    coord_flip() +
    facet_grid(Trait ~ diagnosis)
## Warning: Removed 7 rows containing missing values (geom_label_repel).

ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_asd_scz_azd.svg", sep = ""), height = 7, width = 3)
## Warning: Removed 7 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_asd_scz_azd.png", sep = ""), height = 7, width = 3)
## Warning: Removed 7 rows containing missing values (geom_label_repel).

4 CTP-PGS associations

#ctpn <- c("Exc", "Inh", "NonN_Astro_FGF3R", "NonN_Micro.Endo_TYROBP", "NonN_Oligo_MBP")
ctpn <- c("Exc", "Inh", "Astro", "Endo", "Micro", "Oligo", "OPC")

pgs_pheno_ctp_eur.long <- melt(pgs_pheno_eur.long, measure.vars = all_of(ctpn), variable.name = "celltype", value.name = "CTP")
## Warning in melt(pgs_pheno_eur.long, measure.vars = all_of(ctpn),
## variable.name = "celltype", : The melt generic in data.table has been
## passed a data.frame and will attempt to redirect to the relevant reshape2
## method; please note that reshape2 is deprecated, and this redirection is now
## deprecated as well. To continue using melt methods from reshape2 while both
## libraries are attached, e.g. melt.list, you can prepend the namespace like
## reshape2::melt(pgs_pheno_eur.long). In the next version, this warning will
## become an error.

4.1 Linear models

4.1.1 No covariates

tidy_df_nocov_asd <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ ASD_PGS, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom")

tidy_df_nocov_scz <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ SCZ_PGS, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom")

tidy_df_nocov_azd <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ AZD_PGS, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom")

tidy_df_nocov_mdd <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ MDD_PGS, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom")

tidy_df_nocov_eduyears <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ EduYears_PGS, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom")

# rbind together
tidy_df_nocov_mega <- rbind(tidy_df_nocov_asd, tidy_df_nocov_scz, tidy_df_nocov_azd, tidy_df_nocov_mdd, tidy_df_nocov_eduyears) %>% filter(term %in% c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS")) %>% mutate(celltype = response)

# Add label
tidy_df_nocov_mega$lab <- ifelse(tidy_df_nocov_mega$p.value < 0.05, paste(
  "b=", ifelse(abs(tidy_df_nocov_mega$estimate < 0.01), formatC(tidy_df_nocov_mega$estimate, format = "e", digits = 1), round(tidy_df_nocov_mega$estimate, 2)), ", ", 
  "p=", ifelse(tidy_df_nocov_mega$p.value < 0.01, formatC(tidy_df_nocov_mega$p.value, format = "e", digits = 1), round(tidy_df_nocov_mega$p.value, 2)), ", ", 
  "df=", tidy_df_nocov_mega$df.error, sep = ""), NA)

# Change cell-type names
tidy_df_nocov_mega$celltype <- tidy_df_nocov_mega$response

# Plot
tidy_df_nocov_mega %>%
  mutate(MatchTerm = match(term, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
  mutate(term = fct_reorder(term, MatchTerm)) %>% 
  mutate(MatchCell = match(celltype, all_of(ctpn))) %>%
  mutate(celltype = fct_reorder(celltype, MatchCell)) %>% 
  ggplot(aes(x = fct_rev(celltype), y = estimate, colour = term, label = lab)) +
    geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
#    geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
    geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
    geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
    scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
    theme_bw() + theme(legend.position = "none") +
    xlab("cell-type") +
    ylab("estimate (clr-transformed, no covariates)") +
    coord_flip() +
    facet_wrap(~ term)
## Warning: Removed 18 rows containing missing values (geom_label_repel).

ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_nocov.svg", sep = ""), height = 7, width = 7)
## Warning: Removed 18 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_nocov.png", sep = ""), height = 7, width = 7)
## Warning: Removed 18 rows containing missing values (geom_label_repel).

4.1.2 Covariates: age, sex, batch, genoPC1-3 (no dx, ALL participants)

tidy_df_cov_asd <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ ASD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "ASD_PGS")

tidy_df_cov_scz <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ SCZ_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "SCZ_PGS")

tidy_df_cov_azd <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ AZD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "AZD_PGS")

tidy_df_cov_mdd <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ MDD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "MDD_PGS")

tidy_df_cov_eduyears <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ EduYears_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "EduYears_PGS")

# rbind together
tidy_df_cov_mega_all <- rbind(tidy_df_cov_asd, tidy_df_cov_scz, tidy_df_cov_azd, tidy_df_cov_mdd, tidy_df_cov_eduyears)
datatable(tidy_df_cov_mega_all)
tidy_df_cov_mega <- tidy_df_cov_mega_all %>% filter(term %in% c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS")) %>% mutate(celltype = response)

# Add label
tidy_df_cov_mega$lab <- ifelse(tidy_df_cov_mega$p.value < 0.05, paste(
  "b=", ifelse(abs(tidy_df_cov_mega$estimate < 0.01), formatC(tidy_df_cov_mega$estimate, format = "e", digits = 1), round(tidy_df_cov_mega$estimate, 2)), ", ", 
  "p=", ifelse(tidy_df_cov_mega$p.value < 0.01, formatC(tidy_df_cov_mega$p.value, format = "e", digits = 1), round(tidy_df_cov_mega$p.value, 2)), ", ", 
  "df=", tidy_df_cov_mega$df.error, sep = ""), NA)

# Change cell-type names
tidy_df_cov_mega$celltype <- tidy_df_cov_mega$response

# Plot
tidy_df_cov_mega %>%
  mutate(MatchTerm = match(term, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
  mutate(term = fct_reorder(term, MatchTerm)) %>% 
  mutate(MatchCell = match(celltype, all_of(ctpn))) %>%
  mutate(celltype = fct_reorder(celltype, MatchCell)) %>% 
  ggplot(aes(x = fct_rev(celltype), y = estimate, colour = term, label = lab)) +
    geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
#    geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
    geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
    geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
    scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
    theme_bw() + theme(legend.position = "none") +
    xlab("cell-type") +
    ylab("estimate (clr-transformed, adjusted for covariates)") +
    coord_flip() +
    facet_wrap(~ term)
## Warning: Removed 34 rows containing missing values (geom_label_repel).

ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_cov.svg", sep = ""), height = 7, width = 7)
## Warning: Removed 34 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_cov.png", sep = ""), height = 7, width = 7)
## Warning: Removed 34 rows containing missing values (geom_label_repel).

4.1.3 Covariates: age, sex, batch, genoPC1-3, CTL only

tidy_df_cov_asd_CTL <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ ASD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "CTL")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "ASD_PGS")

tidy_df_cov_scz_CTL <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ SCZ_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "CTL")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "SCZ_PGS")

tidy_df_cov_azd_CTL <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ AZD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "CTL")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "AZD_PGS")

tidy_df_cov_mdd_CTL <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ MDD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "CTL")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "MDD_PGS")

tidy_df_cov_eduyears_CTL <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ EduYears_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "CTL")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "EduYears_PGS")

# rbind together
tidy_df_cov_mega_CTL_all <- rbind(tidy_df_cov_asd_CTL, tidy_df_cov_scz_CTL, tidy_df_cov_azd_CTL, tidy_df_cov_mdd_CTL, tidy_df_cov_eduyears_CTL)
datatable(tidy_df_cov_mega_CTL_all)
tidy_df_cov_mega_CTL <- tidy_df_cov_mega_CTL_all %>% filter(term %in% c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS")) %>% mutate(celltype = response)

# Add label
tidy_df_cov_mega_CTL$lab <- ifelse(tidy_df_cov_mega_CTL$p.value < 0.05, paste(
  "b=", ifelse(abs(tidy_df_cov_mega_CTL$estimate < 0.01), formatC(tidy_df_cov_mega_CTL$estimate, format = "e", digits = 1), round(tidy_df_cov_mega_CTL$estimate, 2)), ", ", 
  "p=", ifelse(tidy_df_cov_mega_CTL$p.value < 0.01, formatC(tidy_df_cov_mega_CTL$p.value, format = "e", digits = 1), round(tidy_df_cov_mega_CTL$p.value, 2)), ", ", 
  "df=", tidy_df_cov_mega_CTL$df.error, sep = ""), NA)

# Change cell-type names
tidy_df_cov_mega_CTL$celltype <- tidy_df_cov_mega_CTL$response

# Plot
tidy_df_cov_mega_CTL %>%
  mutate(MatchTerm = match(term, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGs", "EduYears_PGS"))) %>%
  mutate(term = fct_reorder(term, MatchTerm)) %>% 
  mutate(MatchCell = match(celltype, all_of(ctpn))) %>%
  mutate(celltype = fct_reorder(celltype, MatchCell)) %>% 
  ggplot(aes(x = fct_rev(celltype), y = estimate, colour = term, label = lab)) +
    geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
#    geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
    geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
    geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
    scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
    theme_bw() + theme(legend.position = "none") +
    xlab("cell-type") +
    ylab("estimate (clr-transformed, adjusted for covariates) from nonDX") +
    coord_flip() +
    facet_wrap(~ term)
## Warning: Removed 33 rows containing missing values (geom_label_repel).

ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_cov_CTLonly.svg", sep = ""), height = 7, width = 7)
## Warning: Removed 33 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_cov_CTLonly.png", sep = ""), height = 7, width = 7)
## Warning: Removed 33 rows containing missing values (geom_label_repel).

4.1.4 Covariates: age, sex, batch, genoPC1-3, DX only

tidy_df_cov_asd_DX <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ ASD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "ASD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "ASD_PGS")

tidy_df_cov_scz_DX <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ SCZ_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "SCZ")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "SCZ_PGS")

tidy_df_cov_azd_DX <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ AZD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "AZD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "AZD_PGS")

tidy_df_cov_mdd_DX <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ MDD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "AZD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "MDD_PGS")

tidy_df_cov_eduyears_DX <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ EduYears_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "AZD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "EduYears_PGS")

# rbind together
tidy_df_cov_mega_DX_all <- rbind(tidy_df_cov_asd_DX, tidy_df_cov_scz_DX, tidy_df_cov_azd_DX, tidy_df_cov_mdd_DX, tidy_df_cov_eduyears_DX)
datatable(tidy_df_cov_mega_DX_all)
tidy_df_cov_mega_DX <- tidy_df_cov_mega_DX_all %>% filter(term %in% c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS")) %>% mutate(celltype = response)

# Add label
tidy_df_cov_mega_DX$lab <- ifelse(tidy_df_cov_mega_DX$p.value < 0.05, paste(
  "b=", ifelse(abs(tidy_df_cov_mega_DX$estimate < 0.01), formatC(tidy_df_cov_mega_DX$estimate, format = "e", digits = 1), round(tidy_df_cov_mega_DX$estimate, 2)), ", ", 
  "p=", ifelse(tidy_df_cov_mega_DX$p.value < 0.01, formatC(tidy_df_cov_mega_DX$p.value, format = "e", digits = 1), round(tidy_df_cov_mega_DX$p.value, 2)), ", ", 
  "df=", tidy_df_cov_mega_DX$df.error, sep = ""), NA)

# Change cell-type names
tidy_df_cov_mega_DX$celltype <- tidy_df_cov_mega_DX$response

# Plot
tidy_df_cov_mega_DX %>%
  mutate(MatchTerm = match(term, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
  mutate(term = fct_reorder(term, MatchTerm)) %>% 
  mutate(MatchCell = match(celltype, all_of(ctpn))) %>%
  mutate(celltype = fct_reorder(celltype, MatchCell)) %>% 
  ggplot(aes(x = fct_rev(celltype), y = estimate, colour = term, label = lab)) +
    geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
#    geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
    geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
    geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
    scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
    theme_bw() + theme(legend.position = "none") +
    xlab("cell-type") +
    ylab("estimate (clr-transformed, adjusted for covariates) from DX") +
    coord_flip() +
    facet_wrap(~ term)
## Warning: Removed 35 rows containing missing values (geom_label_repel).

ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_cov_DXonly.svg", sep = ""), height = 7, width = 7)
## Warning: Removed 35 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_cov_DXonly.png", sep = ""), height = 7, width = 7)
## Warning: Removed 35 rows containing missing values (geom_label_repel).

4.1.5 ROSMAP CTL sensitivity analysis for Endo

# No covariates
summary(lm(Endo ~ AZD_PGS, data = pgs_pheno_eur %>% filter(study == "ROSMAP") %>% filter(dx == "CTL")))
## 
## Call:
## lm(formula = Endo ~ AZD_PGS, data = pgs_pheno_eur %>% filter(study == 
##     "ROSMAP") %>% filter(dx == "CTL"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.308866 -0.058563 -0.002487  0.060605  0.255390 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.876899   0.004703 -186.45   <2e-16 ***
## AZD_PGS     -0.007174   0.004848   -1.48     0.14    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08924 on 362 degrees of freedom
## Multiple R-squared:  0.006011,   Adjusted R-squared:  0.003265 
## F-statistic: 2.189 on 1 and 362 DF,  p-value: 0.1399
# With covariates
summary(lm(Endo ~ AZD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP") %>% filter(dx == "CTL")))
## 
## Call:
## lm(formula = Endo ~ AZD_PGS + age + sex + batch + PC1 + PC2 + 
##     PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP") %>% 
##     filter(dx == "CTL"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28258 -0.05738  0.00511  0.05844  0.25824 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.693804   0.079594  -8.717  < 2e-16 ***
## AZD_PGS      -0.009206   0.004824  -1.908   0.0571 .  
## age          -0.001892   0.000921  -2.055   0.0406 *  
## sexM         -0.041800   0.009628  -4.341 1.85e-05 ***
## batchROSMAP1 -0.008749   0.009562  -0.915   0.3608    
## PC1          -0.043222   0.163718  -0.264   0.7919    
## PC2          -0.047736   0.129421  -0.369   0.7125    
## PC3          -0.019359   0.135507  -0.143   0.8865    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08744 on 355 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.06415,    Adjusted R-squared:  0.0457 
## F-statistic: 3.477 on 7 and 355 DF,  p-value: 0.001285

4.2 ANOVA to follow up significant PGS-CTP associations

endo_azd_pgs <- summary(aov(Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + AZD_PGS, data = pgs_pheno_eur %>% filter(dx == "CTL")))[[1]]
endo_azd_pgs
##              Df Sum Sq Mean Sq  F value    Pr(>F)    
## age           1 3.3936  3.3936 507.1352 < 2.2e-16 ***
## age2          1 0.0661  0.0661   9.8763  0.001776 ** 
## sex           1 0.1271  0.1271  18.9904 1.602e-05 ***
## batch         6 0.7313  0.1219  18.2137 < 2.2e-16 ***
## PC1           1 0.0012  0.0012   0.1801  0.671460    
## PC2           1 0.0021  0.0021   0.3201  0.571831    
## PC3           1 0.0000  0.0000   0.0032  0.954800    
## AZD_PGS       1 0.0302  0.0302   4.5191  0.034018 *  
## Residuals   489 3.2723  0.0067                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
endo_azd_pgs_batchfirst <- summary(aov(Endo ~ batch + sex + age + age2 + PC1 + PC2 + PC3 + AZD_PGS, data = pgs_pheno_eur %>% filter(dx == "CTL")))[[1]]
endo_azd_pgs_batchfirst
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## batch         6 3.9247 0.65411 97.7480 < 2.2e-16 ***
## sex           1 0.1515 0.15150 22.6392 2.577e-06 ***
## age           1 0.2406 0.24062 35.9567 3.924e-09 ***
## age2          1 0.0013 0.00134  0.2001   0.65483    
## PC1           1 0.0012 0.00121  0.1801   0.67146    
## PC2           1 0.0021 0.00214  0.3201   0.57183    
## PC3           1 0.0000 0.00002  0.0032   0.95480    
## AZD_PGS       1 0.0302 0.03024  4.5191   0.03402 *  
## Residuals   489 3.2723 0.00669                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
endo_azd_pgs_ctldx <- summary(aov(Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + dx + dx:AZD_PGS, data = pgs_pheno_eur))[[1]]
endo_azd_pgs_ctldx
##              Df Sum Sq Mean Sq  F value    Pr(>F)    
## age           1 7.4353  7.4353 984.4870 < 2.2e-16 ***
## age2          1 0.1482  0.1482  19.6264 1.064e-05 ***
## sex           1 0.0833  0.0833  11.0240 0.0009375 ***
## batch         8 1.4194  0.1774  23.4919 < 2.2e-16 ***
## PC1           1 0.0054  0.0054   0.7215 0.3959043    
## PC2           1 0.0022  0.0022   0.2950 0.5871606    
## PC3           1 0.0098  0.0098   1.2973 0.2550303    
## dx            3 0.1883  0.0628   8.3121 1.877e-05 ***
## dx:AZD_PGS    4 0.0583  0.0146   1.9290 0.1036017    
## Residuals   855 6.4574  0.0076                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
endo_azd_pgs_addROSMAP <- summary(aov(Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + AZD_PGS, data = pgs_pheno_eur %>% filter(dx == "CTL" | study == "ROSMAP")))[[1]]
endo_azd_pgs_addROSMAP
##              Df Sum Sq Mean Sq  F value    Pr(>F)    
## age           1 4.5613  4.5613 572.3515 < 2.2e-16 ***
## age2          1 0.0889  0.0889  11.1597 0.0008774 ***
## sex           1 0.0927  0.0927  11.6297 0.0006839 ***
## batch         6 0.8069  0.1345  16.8753 < 2.2e-16 ***
## PC1           1 0.0019  0.0019   0.2323 0.6299788    
## PC2           1 0.0051  0.0051   0.6374 0.4249105    
## PC3           1 0.0061  0.0061   0.7707 0.3802758    
## AZD_PGS       1 0.1200  0.1200  15.0612 0.0001133 ***
## Residuals   748 5.9611  0.0080                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
endo_azd_pgs_onlyROSMAP <- summary(aov(Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + AZD_PGS, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))[[1]]
endo_azd_pgs_onlyROSMAP
##              Df Sum Sq  Mean Sq F value    Pr(>F)    
## age           1 0.0500 0.050027  5.6893 0.0173716 *  
## age2          1 0.0078 0.007813  0.8885 0.3462521    
## sex           1 0.0931 0.093124 10.5904 0.0011993 ** 
## batch         1 0.0818 0.081811  9.3039 0.0023855 ** 
## PC1           1 0.0014 0.001417  0.1612 0.6882035    
## PC2           1 0.0042 0.004156  0.4726 0.4920314    
## PC3           1 0.0066 0.006576  0.7478 0.3875050    
## AZD_PGS       1 0.1250 0.125005 14.2160 0.0001788 ***
## Residuals   613 5.3903 0.008793                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#astro_scz_pgs <- summary(aov(Astro ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + SCZ_PGS, data = pgs_pheno_eur %>% filter(dx == "CTL")))[[1]]
#astro_scz_pgs

#astro_scz_pgs_ctldx <- summary(aov(Astro ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + dx + dx:SCZ_PGS, data = pgs_pheno_eur))[[1]]
#astro_scz_pgs_ctldx

oligo_scz_pgs <- summary(aov(Oligo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + SCZ_PGS, data = pgs_pheno_eur %>% filter(dx == "CTL")))[[1]]
oligo_scz_pgs
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## age           1  0.0335 0.03346  0.6610   0.41660    
## age2          1  1.5943 1.59431 31.4973  3.35e-08 ***
## sex           1  0.0348 0.03478  0.6870   0.40758    
## batch         6  8.4344 1.40574 27.7719 < 2.2e-16 ***
## PC1           1  0.0111 0.01108  0.2188   0.64014    
## PC2           1  0.0825 0.08253  1.6305   0.20224    
## PC3           1  0.0120 0.01200  0.2370   0.62661    
## SCZ_PGS       1  0.1387 0.13869  2.7400   0.09851 .  
## Residuals   489 24.7519 0.05062                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
oligo_scz_pgs_ctldx <- summary(aov(Oligo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + dx + dx:SCZ_PGS, data = pgs_pheno_eur))[[1]]
oligo_scz_pgs_ctldx
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## age           1  0.003 0.00281  0.0575    0.8106    
## age2          1  3.081 3.08131 63.0021 6.468e-15 ***
## sex           1  0.024 0.02417  0.4942    0.4822    
## batch         8 15.532 1.94155 39.6980 < 2.2e-16 ***
## PC1           1  0.061 0.06060  1.2390    0.2660    
## PC2           1  0.080 0.07991  1.6340    0.2015    
## PC3           1  0.002 0.00231  0.0472    0.8280    
## dx            3  0.267 0.08894  1.8186    0.1422    
## dx:SCZ_PGS    4  0.143 0.03586  0.7333    0.5694    
## Residuals   855 41.816 0.04891                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Check using alternative method
mod0 <- lm(Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "CTL"))
mod1 <- lm(Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + AZD_PGS, data = pgs_pheno_eur %>% filter(dx == "CTL"))
anova(mod0, mod1)
## Analysis of Variance Table
## 
## Model 1: Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3
## Model 2: Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + AZD_PGS
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1    490 3.3025                              
## 2    489 3.2723  1  0.030241 4.5191 0.03402 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Summary tables
endo_azd_pgs_sumsq <- endo_azd_pgs$'Sum Sq'[1:(nrow(endo_azd_pgs)-1)]
endo_azd_pgs_sumsq <- endo_azd_pgs$'Sum Sq'
endo_azd_pgs_totalsumsq <- sum(endo_azd_pgs$'Sum Sq')
endo_azd_pgs_sumsq_summary <- data.frame(variable = rownames(endo_azd_pgs), SumSq = endo_azd_pgs_sumsq, var_pc = endo_azd_pgs_sumsq/endo_azd_pgs_totalsumsq*100, Cumulative = cumsum(endo_azd_pgs_sumsq), p = endo_azd_pgs$'Pr(>F)', CTP_PGS = "Endo/AZD_PGS (CTL)")
endo_azd_pgs_sumsq_summary
##      variable        SumSq       var_pc Cumulative            p
## 1 age         3.393648e+00 4.451263e+01   3.393648 1.412548e-77
## 2 age2        6.609031e-02 8.668706e-01   3.459738 1.775830e-03
## 3 sex         1.270802e-01 1.666842e+00   3.586818 1.602101e-05
## 4 batch       7.312927e-01 9.591968e+00   4.318111 4.003882e-19
## 5 PC1         1.205303e-03 1.580930e-02   4.319316 6.714599e-01
## 6 PC2         2.141781e-03 2.809257e-02   4.321458 5.718308e-01
## 7 PC3         2.152021e-05 2.822689e-04   4.321480 9.548002e-01
## 8 AZD_PGS     3.024110e-02 3.966560e-01   4.351721 3.401840e-02
## 9 Residuals   3.272291e+00 4.292085e+01   7.624011           NA
##              CTP_PGS
## 1 Endo/AZD_PGS (CTL)
## 2 Endo/AZD_PGS (CTL)
## 3 Endo/AZD_PGS (CTL)
## 4 Endo/AZD_PGS (CTL)
## 5 Endo/AZD_PGS (CTL)
## 6 Endo/AZD_PGS (CTL)
## 7 Endo/AZD_PGS (CTL)
## 8 Endo/AZD_PGS (CTL)
## 9 Endo/AZD_PGS (CTL)
#astro_scz_pgs_sumsq <- astro_scz_pgs$'Sum Sq'[1:(nrow(astro_scz_pgs)-1)]
#astro_scz_pgs_sumsq <- astro_scz_pgs$'Sum Sq'
#astro_scz_pgs_totalsumsq <- sum(astro_scz_pgs$'Sum Sq')
#astro_scz_pgs_sumsq_summary <- data.frame(variable = rownames(astro_scz_pgs), SumSq = astro_scz_pgs_sumsq, var_pc = astro_scz_pgs_sumsq/astro_scz_pgs_totalsumsq*100, Cumulative = cumsum(astro_scz_pgs_sumsq), p = astro_scz_pgs$'Pr(>F)', CTP_PGS = "Astro/SCZ_PGS (CTL)")
#astro_scz_pgs_sumsq_summary

oligo_scz_pgs_sumsq <- oligo_scz_pgs$'Sum Sq'[1:(nrow(oligo_scz_pgs)-1)]
oligo_scz_pgs_sumsq <- oligo_scz_pgs$'Sum Sq'
oligo_scz_pgs_totalsumsq <- sum(oligo_scz_pgs$'Sum Sq')
oligo_scz_pgs_sumsq_summary <- data.frame(variable = rownames(oligo_scz_pgs), SumSq = oligo_scz_pgs_sumsq, var_pc = oligo_scz_pgs_sumsq/oligo_scz_pgs_totalsumsq*100, Cumulative = cumsum(oligo_scz_pgs_sumsq), p = oligo_scz_pgs$'Pr(>F)', CTP_PGS = "Oligo/SCZ_PGS (CTL)")
oligo_scz_pgs_sumsq_summary
##      variable       SumSq      var_pc  Cumulative            p
## 1 age          0.03345882  0.09534283  0.03345882 4.165976e-01
## 2 age2         1.59431182  4.54308395  1.62777063 3.349931e-08
## 3 sex          0.03477507  0.09909358  1.66254570 4.075847e-01
## 4 batch        8.43443640 24.03441554 10.09698210 1.459263e-28
## 5 PC1          0.01107684  0.03156409 10.10805894 6.401376e-01
## 6 PC2          0.08253113  0.23517723 10.19059007 2.022412e-01
## 7 PC3          0.01199582  0.03418278 10.20258589 6.266065e-01
## 8 SCZ_PGS      0.13869187  0.39521051 10.34127775 9.850597e-02
## 9 Residuals   24.75188433 70.53192948 35.09316208           NA
##               CTP_PGS
## 1 Oligo/SCZ_PGS (CTL)
## 2 Oligo/SCZ_PGS (CTL)
## 3 Oligo/SCZ_PGS (CTL)
## 4 Oligo/SCZ_PGS (CTL)
## 5 Oligo/SCZ_PGS (CTL)
## 6 Oligo/SCZ_PGS (CTL)
## 7 Oligo/SCZ_PGS (CTL)
## 8 Oligo/SCZ_PGS (CTL)
## 9 Oligo/SCZ_PGS (CTL)
endo_azd_pgs_addROSMAP_sumsq <- endo_azd_pgs_addROSMAP$'Sum Sq'[1:(nrow(endo_azd_pgs_addROSMAP)-1)]
endo_azd_pgs_addROSMAP_sumsq <- endo_azd_pgs_addROSMAP$'Sum Sq'
endo_azd_pgs_addROSMAP_totalsumsq <- sum(endo_azd_pgs_addROSMAP$'Sum Sq')
endo_azd_pgs_addROSMAP_sumsq_summary <- data.frame(variable = rownames(endo_azd_pgs_addROSMAP), SumSq = endo_azd_pgs_addROSMAP_sumsq, var_pc = endo_azd_pgs_addROSMAP_sumsq/endo_azd_pgs_addROSMAP_totalsumsq*100, Cumulative = cumsum(endo_azd_pgs_addROSMAP_sumsq), p = endo_azd_pgs_addROSMAP$'Pr(>F)', CTP_PGS = "Endo/AZD_PGS (CTL+AZD)")
endo_azd_pgs_addROSMAP_sumsq_summary
##      variable       SumSq      var_pc Cumulative            p
## 1 age         4.561323380 39.17279366   4.561323 2.222843e-94
## 2 age2        0.088936947  0.76379339   4.650260 8.773950e-04
## 3 sex         0.092682144  0.79595727   4.742942 6.838984e-04
## 4 batch       0.806921161  6.92986520   5.549864 2.497876e-18
## 5 PC1         0.001851148  0.01589772   5.551715 6.299788e-01
## 6 PC2         0.005079652  0.04362422   5.556794 4.249105e-01
## 7 PC3         0.006142224  0.05274962   5.562937 3.802758e-01
## 8 AZD_PGS     0.120029204  1.03081471   5.682966 1.132667e-04
## 9 Residuals   5.961144641 51.19450421  11.644111           NA
##                  CTP_PGS
## 1 Endo/AZD_PGS (CTL+AZD)
## 2 Endo/AZD_PGS (CTL+AZD)
## 3 Endo/AZD_PGS (CTL+AZD)
## 4 Endo/AZD_PGS (CTL+AZD)
## 5 Endo/AZD_PGS (CTL+AZD)
## 6 Endo/AZD_PGS (CTL+AZD)
## 7 Endo/AZD_PGS (CTL+AZD)
## 8 Endo/AZD_PGS (CTL+AZD)
## 9 Endo/AZD_PGS (CTL+AZD)
endo_azd_pgs_onlyROSMAP_sumsq <- endo_azd_pgs_onlyROSMAP$'Sum Sq'[1:(nrow(endo_azd_pgs_onlyROSMAP)-1)]
endo_azd_pgs_onlyROSMAP_sumsq <- endo_azd_pgs_onlyROSMAP$'Sum Sq'
endo_azd_pgs_onlyROSMAP_totalsumsq <- sum(endo_azd_pgs_onlyROSMAP$'Sum Sq')
endo_azd_pgs_onlyROSMAP_sumsq_summary <- data.frame(variable = rownames(endo_azd_pgs_onlyROSMAP), SumSq = endo_azd_pgs_onlyROSMAP_sumsq, var_pc = endo_azd_pgs_onlyROSMAP_sumsq/endo_azd_pgs_onlyROSMAP_totalsumsq*100, Cumulative = cumsum(endo_azd_pgs_onlyROSMAP_sumsq), p = endo_azd_pgs_onlyROSMAP$'Pr(>F)', CTP_PGS = "Endo/AZD_PGS (ROSMAP)")
endo_azd_pgs_onlyROSMAP_sumsq_summary
##      variable       SumSq      var_pc Cumulative            p
## 1 age         0.050027458  0.86850146 0.05002746 0.0173715973
## 2 age2        0.007812928  0.13563629 0.05784039 0.3462521265
## 3 sex         0.093124296  1.61668390 0.15096468 0.0011993044
## 4 batch       0.081811265  1.42028409 0.23277595 0.0023854596
## 5 PC1         0.001417384  0.02460648 0.23419333 0.6882035473
## 6 PC2         0.004156124  0.07215237 0.23834945 0.4920313749
## 7 PC3         0.006575799  0.11415912 0.24492525 0.3875050180
## 8 AZD_PGS     0.125004815  2.17014550 0.36993007 0.0001787543
## 9 Residuals   5.390274230 93.57783077 5.76020430           NA
##                 CTP_PGS
## 1 Endo/AZD_PGS (ROSMAP)
## 2 Endo/AZD_PGS (ROSMAP)
## 3 Endo/AZD_PGS (ROSMAP)
## 4 Endo/AZD_PGS (ROSMAP)
## 5 Endo/AZD_PGS (ROSMAP)
## 6 Endo/AZD_PGS (ROSMAP)
## 7 Endo/AZD_PGS (ROSMAP)
## 8 Endo/AZD_PGS (ROSMAP)
## 9 Endo/AZD_PGS (ROSMAP)
#ctp_pgs_summary <- rbind(endo_azd_pgs_sumsq_summary, endo_azd_pgs_addROSMAP_sumsq_summary, endo_azd_pgs_onlyROSMAP_sumsq_summary, astro_scz_pgs_sumsq_summary, oligo_scz_pgs_sumsq_summary)
ctp_pgs_summary <- rbind(endo_azd_pgs_sumsq_summary, endo_azd_pgs_addROSMAP_sumsq_summary, endo_azd_pgs_onlyROSMAP_sumsq_summary, oligo_scz_pgs_sumsq_summary)

ctp_pgs_summary %>% 
  mutate(variable = gsub("[[:blank:]]", "", variable)) %>%
  filter(variable != "Residuals") %>%
  mutate(variable = gsub("SCZ_PGS", "PGS", variable)) %>%
  mutate(variable = gsub("AZD_PGS", "PGS", variable)) %>%
  mutate(MatchVariable = match(variable, unique(variable))) %>%
  mutate(MatchAnalysis = match(CTP_PGS, unique(CTP_PGS))) %>%
  mutate(variable = fct_reorder(variable, rev(MatchVariable))) %>%
  mutate(CTP_PGS = fct_reorder(CTP_PGS, rev(MatchAnalysis))) %>%
  ggplot(aes(x = CTP_PGS, y = var_pc, fill = variable)) + 
  geom_bar(position="stack", stat="identity") +
  scale_fill_brewer(palette = "Dark2") +
  theme_bw() +
  ylab("Variance % explained of clr-transformed CTP") + xlab("Association tested") +
  coord_flip()

ggsave(paste(out_dir, "/pgs_EUR_anova_varpc_cov_pgssigassoc.svg", sep = ""), height = 4, width = 7)
ggsave(paste(out_dir, "/pgs_EUR_anova_varpc_cov_pgssigassoc.png", sep = ""), height = 4, width = 7)

4.3 Scatterplots (no cov)

pgs_pheno_ctp_eur.long$celltypen <- pgs_pheno_ctp_eur.long$celltype

pgs_pheno_ctp_eur.long %>%
  filter(Trait != "Height_PGS") %>%
  mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
  mutate(Trait = fct_reorder(Trait, MatchTrait)) %>% 
  mutate(MatchCell = match(celltypen, all_of(ctpn))) %>%
  mutate(celltypen = fct_reorder(celltypen, MatchCell)) %>% 
  ggplot(aes(x = PGS, y = CTP)) +
  geom_point(aes(colour = Trait)) +
  geom_smooth(method = "lm") +
  scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
  ylab("CTP (clr-transformed)") +
  theme_bw() + theme(legend.position = "none") +
  facet_grid(celltypen ~ Trait, scales = "free")
## `geom_smooth()` using formula 'y ~ x'

ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_nocov_ALL.svg", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_nocov_ALL.png", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'

4.4 Scatterplots (regress out batch effect, ALL participants)

batch_coef <- pgs_pheno_ctp_eur.long %>% group_by(celltype) %>% 
  do(model = tidy(lm(CTP ~ batch, data = .))) %>% unnest(model) %>%
  mutate(term = gsub("batch", "", term)) %>%
  dplyr::rename(batch = term) %>%
  dplyr::select(c("celltype", "batch", "estimate"))

# Modify CTP in light of batch adjustment
# - subtraction as it's relative to a given batch (eg. coefficient +1, means that batch has +1 unit higher on average --> need to push back down)
pgs_pheno_ctp_eur.long %>%
  filter(Trait != "Height_PGS") %>%
  left_join(., batch_coef, by = c("celltype", "batch")) %>% 
  mutate(CTP_batchadjust = ifelse(is.na(estimate), CTP, CTP - estimate)) %>% 
  mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
  mutate(Trait = fct_reorder(Trait, MatchTrait)) %>% 
  mutate(MatchCell = match(celltypen, all_of(ctpn))) %>%
  mutate(celltypen = fct_reorder(celltypen, MatchCell)) %>% 
  ggplot(aes(x = PGS, y = CTP_batchadjust)) +
  geom_point(aes(colour = Trait)) +
  geom_smooth(method = "lm") +
  scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
  ylab("CTP (clr-transformed, adjusted for batch)") +
  theme_bw() + theme(legend.position = "none") +
  facet_grid(celltypen ~ Trait, scales = "free")
## `geom_smooth()` using formula 'y ~ x'

ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_adjbatch_ALL.svg", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_adjbatch_ALL.png", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'

4.5 Scatterplots (regress out batch effect, CTL participants)

batch_coef <- pgs_pheno_ctp_eur.long %>% group_by(celltype) %>% 
  do(model = tidy(lm(CTP ~ batch, data = .))) %>% unnest(model) %>%
  mutate(term = gsub("batch", "", term)) %>%
  dplyr::rename(batch = term) %>%
  dplyr::select(c("celltype", "batch", "estimate"))

# Modify CTP in light of batch adjustment
# - subtraction as it's relative to a given batch (eg. coefficient +1, means that batch has +1 unit higher on average --> need to push back down)
pgs_pheno_ctp_eur.long %>% filter(dx == "CTL") %>%
  filter(Trait != "Height_PGS") %>%
  left_join(., batch_coef, by = c("celltype", "batch")) %>% 
  mutate(CTP_batchadjust = ifelse(is.na(estimate), CTP, CTP - estimate)) %>% 
  mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
  mutate(Trait = fct_reorder(Trait, MatchTrait)) %>% 
  mutate(MatchCell = match(celltypen, all_of(ctpn))) %>%
  mutate(celltypen = fct_reorder(celltypen, MatchCell)) %>% 
  ggplot(aes(x = PGS, y = CTP_batchadjust)) +
  geom_point(aes(colour = Trait)) +
  geom_smooth(method = "lm") +
  scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
  ylab("CTP (clr-transformed, adjusted for batch, CTL only)") +
  theme_bw() + theme(legend.position = "none") +
  facet_grid(celltypen ~ Trait, scales = "free")
## `geom_smooth()` using formula 'y ~ x'

ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_adjbatch_CTLonly.svg", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_adjbatch_CTLonly.png", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
pgs_pheno_ctp_eur.long %>% filter(dx == "CTL") %>%
  filter(Trait != "Height_PGS") %>%
  left_join(., batch_coef, by = c("celltype", "batch")) %>% 
  mutate(CTP_batchadjust = ifelse(is.na(estimate), CTP, CTP - estimate)) %>% 
  mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
  mutate(Trait = fct_reorder(Trait, MatchTrait)) %>% 
  mutate(MatchCell = match(celltypen, all_of(ctpn))) %>%
  mutate(celltypen = fct_reorder(celltypen, MatchCell)) %>% 
  ggplot(aes(x = PGS, y = CTP_batchadjust)) +
  geom_point(aes(colour = Trait)) +
  geom_smooth(method = "lm") +
  scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
  ylab("CTP (clr-transformed, adjusted for batch, CTL only)") +
  coord_cartesian(ylim=c(-2, 2)) +
  theme_bw() + theme(legend.position = "none") +
  facet_grid(celltypen ~ Trait)
## `geom_smooth()` using formula 'y ~ x'

ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_adjbatch_neg2to2_CTLonly.svg", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_adjbatch_neg2to2_CTLonly.png", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'

4.6 All participants

4.6.1 Correlation

pgs_pheno_ctp.cor <- pgs_pheno_ctp_eur.long %>% group_by(Trait, celltype) %>% dplyr::summarise(r_pearson = cor.test(PGS, CTP)$estimate, p = cor.test(PGS, CTP)$p.value)
## `summarise()` has grouped output by 'Trait'. You can override using the
## `.groups` argument.
pgs_pheno_ctp.cor %>% dplyr::select(c("Trait", "celltype", "r_pearson")) %>% spread(Trait, r_pearson)
## # A tibble: 7 × 7
##   celltype  ASD_PGS SCZ_PGS  AZD_PGS EduYears_PGS MDD_PGS Height_PGS
##   <fct>       <dbl>   <dbl>    <dbl>        <dbl>   <dbl>      <dbl>
## 1 Exc      -0.0574  -0.150  -0.0226       0.181   -0.0837     0.120 
## 2 Inh      -0.0571  -0.124   0.00608      0.119   -0.0314     0.101 
## 3 Astro    -0.0600  -0.130  -0.0568       0.112   -0.0121     0.125 
## 4 Endo      0.0564   0.103  -0.0774      -0.185    0.0893    -0.0808
## 5 Micro     0.0561   0.128   0.00572     -0.180    0.0948    -0.108 
## 6 Oligo    -0.00271  0.0796  0.00259     -0.00266 -0.0157     0.0184
## 7 OPC       0.0479   0.0755  0.0613      -0.0808   0.0116    -0.117
pgs_pheno_ctp.cor %>% dplyr::select(c("Trait", "celltype", "p")) %>% spread(Trait, p)
## # A tibble: 7 × 7
##   celltype ASD_PGS    SCZ_PGS AZD_PGS EduYears_PGS MDD_PGS Height_PGS
##   <fct>      <dbl>      <dbl>   <dbl>        <dbl>   <dbl>      <dbl>
## 1 Exc       0.0889 0.00000802  0.503  0.0000000651 0.0131    0.000352
## 2 Inh       0.0908 0.000229    0.857  0.000427     0.353     0.00274 
## 3 Astro     0.0756 0.000110    0.0926 0.000863     0.719     0.000200
## 4 Endo      0.0951 0.00236     0.0218 0.0000000364 0.00810   0.0166  
## 5 Micro     0.0965 0.000147    0.865  0.0000000819 0.00491   0.00137 
## 6 Oligo     0.936  0.0183      0.939  0.937        0.643     0.586   
## 7 OPC       0.156  0.0253      0.0693 0.0166       0.731     0.000498

4.6.2 Plot

#keepn <- c("Exc", "Inh", "NonN_Astro_FGF3R", "NonN_Micro.Endo_TYROBP", "NonN_Oligo_MBP", "ASD_PGS", "SCZ_PGS", "AZD_PGS")
keepn <- c(ctpn, "ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS")

g <- ggduo(pgs_pheno_eur, ctpn, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"), xlab = "CTPs", ylab = "PGS", title = "Brain CTPs vs Neuropsychiatric PGS")

#ggpairs(pgs_pheno_eur %>% dplyr::select(all_of(keepn)))

4.7 CTL only

4.7.1 Correlation

pgs_pheno_ctp_CTL.cor <- pgs_pheno_ctp_eur.long %>% filter(dx == "CTL") %>% group_by(Trait, celltype) %>% dplyr::summarise(r_pearson = cor.test(PGS, CTP)$estimate, p = cor.test(PGS, CTP)$p.value)
## `summarise()` has grouped output by 'Trait'. You can override using the
## `.groups` argument.
pgs_pheno_ctp_CTL.cor %>% dplyr::select(c("Trait", "celltype", "r_pearson")) %>% spread(Trait, r_pearson)
## # A tibble: 7 × 7
##   celltype ASD_PGS  SCZ_PGS AZD_PGS EduYears_PGS MDD_PGS Height_PGS
##   <fct>      <dbl>    <dbl>   <dbl>        <dbl>   <dbl>      <dbl>
## 1 Exc      -0.0655 -0.114   -0.0973       0.220  -0.0916    0.142  
## 2 Inh      -0.0557 -0.0452  -0.0770       0.149  -0.0178    0.0944 
## 3 Astro    -0.0779 -0.123   -0.107        0.159  -0.0470    0.133  
## 4 Endo      0.0345 -0.00145  0.0322      -0.224   0.0767   -0.0819 
## 5 Micro     0.0568  0.108    0.0910      -0.246   0.112    -0.103  
## 6 Oligo     0.0169  0.0908   0.0259       0.0211 -0.0345    0.00593
## 7 OPC       0.0546  0.0441   0.0760      -0.104   0.0333   -0.126
pgs_pheno_ctp_CTL.cor %>% dplyr::select(c("Trait", "celltype", "p")) %>% spread(Trait, p)
## # A tibble: 7 × 7
##   celltype ASD_PGS SCZ_PGS AZD_PGS EduYears_PGS MDD_PGS Height_PGS
##   <fct>      <dbl>   <dbl>   <dbl>        <dbl>   <dbl>      <dbl>
## 1 Exc       0.142  0.0104   0.0290 0.000000578   0.0398    0.00135
## 2 Inh       0.212  0.311    0.0843 0.000776      0.690     0.0340 
## 3 Astro     0.0805 0.00562  0.0161 0.000325      0.292     0.00275
## 4 Endo      0.440  0.974    0.470  0.000000361   0.0855    0.0661 
## 5 Micro     0.203  0.0149   0.0412 0.0000000217  0.0118    0.0204 
## 6 Oligo     0.705  0.0416   0.562  0.637         0.439     0.894  
## 7 OPC       0.221  0.323    0.0884 0.0190        0.456     0.00449

4.7.2 Plot

#keepn <- c("Exc", "Inh", "NonN_Astro_FGF3R", "NonN_Micro.Endo_TYROBP", "NonN_Oligo_MBP", "ASD_PGS", "SCZ_PGS", "AZD_PGS")
keepn <- c(ctpn, "ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS")

#ggpairs(pgs_pheno_eur %>% filter(dx == "CTL") %>% dplyr::select(all_of(keepn)))
g <- ggduo(pgs_pheno_eur %>% filter(dx == "CTL"), ctpn, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"), xlab = "CTPs", ylab = "PGS", title = "Brain CTPs vs Neuropsychiatric PGS")